In [34]:
# convert another data type into a string
s = str(42)
s
Out[34]:
In [35]:
# convert to a string with 2 decimal places
'{:.2f}'.format(3.14159)
Out[35]:
In [36]:
s='I like you'
s[0]
Out[36]:
In [37]:
len(s)
Out[37]:
String slicing is like list slicing:
In [38]:
s[:6]
Out[38]:
In [39]:
s[7:]
Out[39]:
In [40]:
s[-1]
Out[40]:
In [41]:
s[2:-1:2]
Out[41]:
Using the find function
In [42]:
# returns -1 if not found
s.find('like')
Out[42]:
In [43]:
# returns -1 if not found
s.find('hate')
Out[43]:
Check start of string
In [44]:
s.startswith('I') # also endswith
Out[44]:
In [45]:
# pass a tuple to startswith/endswith to check for any of multiple strings
places = ['france', 'http://france.com', 'USA', 'https://usa.com']
[n for n in places if n.startswith(('http://', 'https://'))]
Out[45]:
fmatch mathes strings using ok-like wildcards
In [46]:
# use this when you just want to add a simple wildcard
from fnmatch import fnmatch, fnmatchcase
#fnmatch uses the case sensitivity of the OS, fnmatchcase lets you specify case sensitivity
names = ['Dat1.csv', 'Dat2.csv', 'config.ini', 'foo.py']
[name for name in names if fnmatch(name, 'Dat*.csv')]
Out[46]:
In [47]:
# checks if every character in the string is a digit
s.isdigit()
Out[47]:
These functions return a new string. The original is unmodified.
In [48]:
s.lower() # also upper()
Out[48]:
In [49]:
# replaces all instances of 'like' with 'love'
s.replace('like', 'love')
Out[49]:
In [50]:
# remove leading and trailing whitespace
s5 = ' ham and cheese '
s5.strip()
Out[50]:
In [51]:
s5.lstrip()
Out[51]:
In [52]:
s5.rstrip()
Out[52]:
Split a string:
In [53]:
# split a string into a list of substrings seperated by a delimiter
s.split(' ')
Out[53]:
In [54]:
# default delimiter is a space
s.split()
Out[54]:
Using regular expressions to split with multiple delimiters and whitespace handling. See Regular Expressions for more on regular expressions.
In [55]:
# splitting with a regex allows using multiple delimiters and whitespace handling
import re
line = 'asdf fjdk; afed, fjek,asdf, foo'
re.split(r'[;,\s]\s*', line)
Out[55]:
In [56]:
# don't use capturing groups (parenthesis) or the delimiters are captured as fields in the split string
re.split(r'(;|,|\s)\s*', line)
Out[56]:
In [57]:
# instead use non-capturing groups / parenthesis
re.split(r'(?:,|;|\s)\s*', line) # note the '(?:' starting the group, which denotes a non-capturing group
Out[57]:
Join / concatenate strings
In [102]:
# join a list of strings into one sring using a delimiter
stooges = ['larry', 'curly', 'moe']
' '.join(stooges)
Out[102]:
In [103]:
# convert to string and format in one step
data = ['ACME', 50, 91.1]
','.join(str(d) for d in data)
Out[103]:
In [80]:
#string concatenation
s + ' frank'
Out[80]:
In [83]:
# using a dictionary and tranlate() to map chars in a string
remap = {
ord('\t'): ' ',
ord('\f'): ' ',
ord('\r'): None, #char is deleted from translated string
ord('\n'): None,
}
s = 'python\fis\tawesome\r\n'
s.translate(remap)
Out[83]:
In [60]:
# old way
'raining %s and %s' % ('cats', 'dogs')
Out[60]:
In [61]:
# new way
'raining {} and {}'.format('cats', 'dogs')
Out[61]:
In [62]:
#new way (using named arguments)
'raining {animal1} and {animal2}'.format(animal1='cats', animal2='dogs')
Out[62]:
Formatting parameters (more examples):
In [1]:
# use 2 decimal places
'pi is {:.2f}'.format(3.14159)
Out[1]:
In [3]:
#adding thousands seperator to format
'average hieght is {:,.2f} mm'.format(45678789.7653)
Out[3]:
text justification
In [85]:
# useful when outputing fixed-width text records
text = 'Hello World'
text.ljust(20)
Out[85]:
In [86]:
text.rjust(20)
Out[86]:
In [87]:
text.center(20)
Out[87]:
In [88]:
# fill with a specific char
text.center(20,'*')
Out[88]:
using format for text justification
In [89]:
format(text, '>20')
Out[89]:
In [90]:
format(text, '<20')
Out[90]:
In [91]:
format(text, '^20')
Out[91]:
In [97]:
# fill with a specific character
format(text, '*^20s')
Out[97]:
In [98]:
#format works with non strings
x = 1.2345
format(x, '*^10')
Out[98]:
In [104]:
# format is useful when formatting multiple values or non-strings
'{:>10s}{:>10s}{:*>10f}'.format('Hello', 'World', 1.23)
Out[104]:
using variable names in format strings
In [105]:
# using named parameters
s = '{name} has {n} messages.'
s.format(name='Guido', n=37)
Out[105]:
In [106]:
# using local vairables
name = 'Guido'
n = 37
s.format_map(vars())
Out[106]:
In [108]:
#using variables in a class instance
class Info:
def __init__(self, name, n):
self.name = name
self.n = n
info = Info('Guido',37)
s.format_map(vars(info))
Out[108]:
In [64]:
# normal strings allow for escaped characters
print('first line\nsecond line')
In [65]:
# raw strings treat backslashes as literal characters
print(r'first line\nsecond line')
In [66]:
import re
text1 = '11/27/2012'
text2 = 'Nov 27, 2012'
# use a raw string to define reg exps
m = re.match(r'\d+/\d+/\d+', text1)
if m:
print(m.group()) # returns the entire matched string
else:
print('no match')
If you are going to reuse the expression a bunch, precompile it
In [67]:
datepat = re.compile(r'\d+/\d+/\d+')
if(datepat.match(text1)):
print('yes')
else:
print('no')
Capture groups
In [68]:
datepat = re.compile(r'(\d+)/(\d+)/(\d+)') # parantheses define capture groups.
m = datepat.match(text1)
m.group()
Out[68]:
In [69]:
m.group(0)
Out[69]:
In [70]:
m.group(1)
Out[70]:
In [71]:
m.group(2)
Out[71]:
In [72]:
m.group(3)
Out[72]:
In [73]:
m.groups() # returns a tuple
Out[73]:
finding all matches
In [74]:
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
datepat.findall(text) # returns a list of tuples
Out[74]:
In [75]:
# finditer is like findall but returns an iterable
for m in datepat.finditer(text):
print(m.groups())
replacing strings with reg exps
In [76]:
# the backslashes in the replacement string reference capture groups
re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text)
Out[76]:
In [77]:
#using a callback function for the substitution string
def change_date(m):
return '{}|{}|{}'.format(m.group(2), m.group(1), m.group(3))
datepat.sub(change_date, text)
Out[77]:
Case insensitive search with reg exps
In [78]:
text = 'UPPER PYTHON, lower python, Mixed Python'
re.findall('python', text, flags=re.IGNORECASE)
Out[78]:
In [110]:
# note that case is not carriedd through in a case insensitive replace.
re.sub('python', 'snake', text, flags=re.IGNORECASE)
Out[110]:
Converting to entities
In [111]:
s = 'Elements are written as "<tag>text</tag>".'
import html
html.escape(s)
Out[111]:
In [113]:
# disable escaping of quotes
html.escape(s, quote=False)
Out[113]:
Converting from entities
In [115]:
# xml entities
t = 'The prompt is >>>'
from xml.sax.saxutils import unescape
unescape(t)
Out[115]:
In [118]:
# html
s = 'Spicy "Jalapeño".'
import html
html.parser.unescape(s)
Out[118]: